library(ggplot2)
library(gridExtra)
library(grid)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
delay = read.csv('data/ABIA.csv')
airport = read.csv('data/airport.csv')

summary(delay)
##       Year          Month         DayofMonth      DayOfWeek    
##  Min.   :2008   Min.   : 1.00   Min.   : 1.00   Min.   :1.000  
##  1st Qu.:2008   1st Qu.: 3.00   1st Qu.: 8.00   1st Qu.:2.000  
##  Median :2008   Median : 6.00   Median :16.00   Median :4.000  
##  Mean   :2008   Mean   : 6.29   Mean   :15.73   Mean   :3.902  
##  3rd Qu.:2008   3rd Qu.: 9.00   3rd Qu.:23.00   3rd Qu.:6.000  
##  Max.   :2008   Max.   :12.00   Max.   :31.00   Max.   :7.000  
##                                                                
##     DepTime       CRSDepTime      ArrTime       CRSArrTime  
##  Min.   :   1   Min.   :  55   Min.   :   1   Min.   :   5  
##  1st Qu.: 917   1st Qu.: 915   1st Qu.:1107   1st Qu.:1115  
##  Median :1329   Median :1320   Median :1531   Median :1535  
##  Mean   :1329   Mean   :1320   Mean   :1487   Mean   :1505  
##  3rd Qu.:1728   3rd Qu.:1720   3rd Qu.:1903   3rd Qu.:1902  
##  Max.   :2400   Max.   :2346   Max.   :2400   Max.   :2400  
##  NA's   :1413                  NA's   :1567                 
##  UniqueCarrier     FlightNum       TailNum      ActualElapsedTime
##  WN     :34876   Min.   :   1          : 1104   Min.   : 22.0    
##  AA     :19995   1st Qu.: 640   N678CA :  195   1st Qu.: 57.0    
##  CO     : 9230   Median :1465   N511SW :  180   Median :125.0    
##  YV     : 4994   Mean   :1917   N526SW :  176   Mean   :120.2    
##  B6     : 4798   3rd Qu.:2653   N528SW :  172   3rd Qu.:164.0    
##  XE     : 4618   Max.   :9741   N520SW :  168   Max.   :506.0    
##  (Other):20749                  (Other):97265   NA's   :1601     
##  CRSElapsedTime     AirTime          ArrDelay           DepDelay      
##  Min.   : 17.0   Min.   :  3.00   Min.   :-129.000   Min.   :-42.000  
##  1st Qu.: 58.0   1st Qu.: 38.00   1st Qu.:  -9.000   1st Qu.: -4.000  
##  Median :130.0   Median :105.00   Median :  -2.000   Median :  0.000  
##  Mean   :122.1   Mean   : 99.81   Mean   :   7.065   Mean   :  9.171  
##  3rd Qu.:165.0   3rd Qu.:142.00   3rd Qu.:  10.000   3rd Qu.:  8.000  
##  Max.   :320.0   Max.   :402.00   Max.   : 948.000   Max.   :875.000  
##  NA's   :11      NA's   :1601     NA's   :1601       NA's   :1413     
##      Origin           Dest          Distance        TaxiIn       
##  AUS    :49623   AUS    :49637   Min.   :  66   Min.   :  0.000  
##  DAL    : 5583   DAL    : 5573   1st Qu.: 190   1st Qu.:  4.000  
##  DFW    : 5508   DFW    : 5506   Median : 775   Median :  5.000  
##  IAH    : 3704   IAH    : 3691   Mean   : 705   Mean   :  6.413  
##  PHX    : 2786   PHX    : 2783   3rd Qu.:1085   3rd Qu.:  7.000  
##  DEN    : 2719   DEN    : 2673   Max.   :1770   Max.   :143.000  
##  (Other):29337   (Other):29397                  NA's   :1567     
##     TaxiOut         Cancelled       CancellationCode    Diverted       
##  Min.   :  1.00   Min.   :0.00000    :97840          Min.   :0.000000  
##  1st Qu.:  9.00   1st Qu.:0.00000   A:  719          1st Qu.:0.000000  
##  Median : 12.00   Median :0.00000   B:  605          Median :0.000000  
##  Mean   : 13.96   Mean   :0.01431   C:   96          Mean   :0.001824  
##  3rd Qu.: 16.00   3rd Qu.:0.00000                    3rd Qu.:0.000000  
##  Max.   :305.00   Max.   :1.00000                    Max.   :1.000000  
##  NA's   :1419                                                          
##   CarrierDelay     WeatherDelay       NASDelay      SecurityDelay   
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.00  
##  Median :  0.00   Median :  0.00   Median :  2.00   Median :  0.00  
##  Mean   : 15.39   Mean   :  2.24   Mean   : 12.47   Mean   :  0.07  
##  3rd Qu.: 16.00   3rd Qu.:  0.00   3rd Qu.: 16.00   3rd Qu.:  0.00  
##  Max.   :875.00   Max.   :412.00   Max.   :367.00   Max.   :199.00  
##  NA's   :79513    NA's   :79513    NA's   :79513    NA's   :79513   
##  LateAircraftDelay
##  Min.   :  0.00   
##  1st Qu.:  0.00   
##  Median :  6.00   
##  Mean   : 22.97   
##  3rd Qu.: 30.00   
##  Max.   :458.00   
##  NA's   :79513
attach(delay)

delay$CRSDepHr = as.factor(format(strptime(str_pad(delay$CRSDepTime,4,side = 'left', pad = '0'),'%H%M'),'%H'))

departure = delay[delay$Origin == 'AUS',]
departure = departure[!is.na(departure$DepDelay),]
departure = departure[departure$DepDelay>0,]

departure$Month = as.factor(departure$Month)
departure$DayofMonth = as.factor(departure$DayofMonth)
departure$DayOfWeek = as.factor(departure$DayOfWeek)

departure$HrWkAvg = ave(departure$DepDelay,departure$DayOfWeek,departure$CRSDepHr)
departure$DoMAvg = ave(departure$DepDelay,departure$Month,departure$DayofMonth)
departure$DestAvg = ave(departure$DepDelay,departure$Dest)
departure$CarrierDAvg = ave(departure$DepDelay,departure$DayOfWeek,departure$UniqueCarrier)

departure$HrCnt = as.numeric(ave(as.character(departure$CRSDepHr), as.character(departure$DayOfWeek), as.character(departure$CRSDepHr), FUN = length))
departure$DoMCnt = as.numeric(ave(as.character(departure$DayofMonth), as.character(departure$Month),as.character(departure$DayofMonth), FUN = length))
departure$DestCnt = as.numeric(ave(as.character(departure$Dest), as.character(departure$Dest), FUN = length))
departure$CarrierMCnt = as.numeric(ave(as.character(departure$UniqueCarrier), as.character(departure$Month),as.character(departure$UniqueCarrier), FUN = length))


departure = merge(departure,airport, by.x = 'Dest', by.y = 'airport')

#Week
MtoTH= subset(departure, DayOfWeek == 1|DayOfWeek==2|DayOfWeek==3|DayOfWeek==4)
FtoS = subset(departure, DayOfWeek == 5|DayOfWeek==6|DayOfWeek==7)

week1 = ggplot(MtoTH, aes(x = CRSDepHr, y = HrWkAvg, group = DayOfWeek, color = DayOfWeek)) +
  geom_point() +
  geom_line(size = 1) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay in Monday to Thursday') 
 
week2 = ggplot(FtoS, aes(x = CRSDepHr, y = HrWkAvg, group = DayOfWeek, color = DayOfWeek)) +
  geom_point() +
  geom_line(size = 1) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 

grid.arrange(week1, week2, ncol = 2)

#Season and Month 
season1 = subset(departure, Month == 1|Month==2|Month==3)
season2 = subset(departure, Month == 4|Month==5|Month==6)
season3 = subset(departure, Month == 7|Month==8|Month==9)
season4 = subset(departure, Month == 10|Month==11|Month==12)

s1 = ggplot(season1, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
  geom_point() +
  geom_line(size = 1) +
  ylim(0,100) +
  xlab('Day of Month') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay in seaon 1') 


s2 = ggplot(season2, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
  geom_point() +
  geom_line(size = 1) + 
  ylim(0,100) +
  xlab('Day of Month') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay in seaon 2') 


s3 = ggplot(season3, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
  geom_point() +
  geom_line(size = 1) + 
  ylim(0,100) +
  xlab('Day of Month') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay in season 3') 


s4 = ggplot(season4, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
  geom_point() +
  geom_line(size = 1) + 
  ylim(0,100) +
  xlab('Day of Month') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay in season 4') 

grid.arrange(s1,s2,s3,s4, nrow = 2, ncol = 2)

#Airline
airlinelarge = c('WN','AA','CO','YV','B6','XE','OO','OH')
airlinesmall = c('MQ','9E','DL','F9','UA','US','EV','NW')

airlarge = departure[departure$UniqueCarrier %in% airlinelarge,]
airsmall = departure[departure$UniqueCarrier %in% airlinesmall,]

air1 = ggplot(airlarge, aes(x = DayOfWeek, y = CarrierDAvg, group = UniqueCarrier, color = UniqueCarrier)) +
  geom_point() +
  geom_line(size = 1) + 
  ylim(0,120) +
  xlab('Day of Week') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay of Major Airlines') 

air2 = ggplot(airsmall, aes(x = DayOfWeek, y = CarrierDAvg, group = UniqueCarrier, color = UniqueCarrier)) +
  geom_point() +
  geom_line(size = 1) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay of Minor Airlines') 

grid.arrange(air1,air2,ncol=2)

#Dest
NE = c('BOS','EWR','JFK','PHL')
MW = c('MDW','ORD','IND','DSM','MSP','MCI','STL','CLE','CVG','DTW')
SA = c('FLL','JAX','MCO','TPA','ATL','BWI','CLT','RDU','IAD','ORF')
ESC = c('BNA','MEM')
WSC = c('MSY','OKC','TUL','DAL','DFW','ELP','HOU','HRL','IAH','LBB','MAF')
M = c('PHX','TUS','DEN','LAS','ABQ','SLC')
P = c('LAX','LGB','OAK','ONT','SAN','SFO','SJC','SNA','SEA')

NEdep = departure[departure$Dest %in% NE,]
MWdep = departure[departure$Dest %in% MW,]
SAdep = departure[departure$Dest %in% SA,]
ESCdep = departure[departure$Dest %in% ESC,]
WSCdep = departure[departure$Dest %in% WSC,]
Mdep = departure[departure$Dest %in% M,]
Pdep = departure[departure$Dest %in% P,]  
  
NEp = ggplot(NEdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
  geom_point() +
  geom_line(size = 0.5) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 

MWp = ggplot(MWdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
  geom_point() +
  geom_line(size = 0.5) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 

SAp = ggplot(SAdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
  geom_point() +
  geom_line(size = 0.5) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 

ESCp = ggplot(ESCdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
  geom_point() +
  geom_line(size = 0.5) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 

WSCp = ggplot(WSCdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
  geom_point() +
  geom_line(size = 0.5) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 

Mp = ggplot(Mdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
  geom_point() +
  geom_line(size = 0.5) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 

Pp = ggplot(Pdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
  geom_point() +
  geom_line(size = 0.5) + 
  ylim(0,120) +
  xlab('Time of Day') +
  ylab('Average delay in minutes') +
  ggtitle('Flight delay Friday to Sunday') 


#destination
departure = 
dest = departure[,c('Dest','DestAvg','DestCnt','lon','lat')]
dest = unique(dest)

g <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showland = TRUE,
  landcolor = toRGB("gray85"),
  subunitwidth = 1,
  countrywidth = 1,
  subunitcolor = toRGB("white"),
  countrycolor = toRGB("white")
)

p <- plot_geo(dest, locationmode = 'USA-states', sizes = c(1, 250),text = ~Dest) %>%
  add_text(x = ~lon, y = ~lat, textposition = "top") %>%
  add_markers(
    x = ~lon, y = ~lat, size = ~DestAvg, color = ~DestAvg, hoverinfo = "text",
    text = ~paste(dest$Dest, "<br />", "Average Delay",dest$DestAvg))%>%
  layout(title = '2008 Austin Departure Flights Average Delay by Destination', geo = g)

Map_1 <- plotly_build(p)$x
l <- length(Map_1$data)
for (i in 1:l)
{
  Map_1$data[[i]]$hoverinfo <- NULL
}
Map_1 <- as_widget(Map_1)
Map_1